set matsize 10000
capture log close
log using "Gentzkow_et_al_rho.log", replace
global rootdir : pwd
global adodir "$rootdir/ado"
adopath ++ "$adodir" 
* verify the path
adopath
* Indicate below the folder where you have put the .dta files
use "voting_cnty_clean.dta", clear

set more off

/*a. Going from the first difference to the panel sample in Gentzkow, and 
creating variables */
 
gen sample=0
gen tminus1sample=0
forvalue i=1872(4)1928 {
replace sample=1 if (year==`i')&mainsample==1
sort cnty90 year
replace tminus1sample=1 if sample==0&sample[_n+1]==1&cnty90==cnty90[_n+1]&year==`i'-4
replace sample=1 if sample[_n+1]==1&cnty90==cnty90[_n+1]&year==`i'-4
}
tab sample mainsample 
keep if sample==1
drop sample

*First differences, lags and leads of treatment and outcome variable

xtset cnty90 year

gen numdailies_l1=l4.numdailies
gen prestout_l1=l4.prestout
gen changedailies=numdailies-numdailies_l1
gen changeprestout=prestout-prestout_l1
gen changedailies_l1=l4.changedailies
gen changeprestout_l1=l4.changeprestout
gen changedailies_l2=l8.changedailies
gen changedailies_for=f4.changedailies

* Creating the state dummies, to be used as controls

qui tab st, gen(st)

keep if year>=1868
keep if year<=1928

/* b. First difference and fixed effects regression (the results are displayed at
 the end of the program */

areg changeprestout changedailies if mainsample, absorb(styr) cluster(cnty90)
scalar betafd=_b[changedailies]
scalar se_fd=_se[changedailies]
scalar N_fd=e(N)

tab styr, gen(styr)
areg prestout i.year numdailies styr1-styr666, absorb(cnty90) cluster(cnty90)
scalar betafe=_b[numdailies]
scalar se_fe=_se[numdailies]
scalar N_fe=e(N)

/* FE and FD give significantly different results?
set seed 1
scalar diff_fe_fd=betafd-betafe
matrix A=0

forvalue i=1/100{
preserve
bsample, cluster(cnty90)
areg changeprestout changedailies if mainsample, absorb(styr)
scalar beta2=_b[changedailies]
areg prestout i.year numdailies styr1-styr666, absorb(cnty90)
matrix A=A\beta2,_b[numdailies]
restore
}

preserve
drop _all
svmat A
drop if _n==1
gen A3=A1-A2
sum A3
scalar se_diff_fe_fd = r(sd)
scalar t_st_diff_fe_fd = diff_fe_fd/r(sd)
export delimited using "fd_fe_bs.csv", replace
restore
*/


/* c. Preparing the variables before computing the weights attached to first-
 difference and fixed effects regressions  */
qui{
keep prestout numdailies changeprestout changedailies cnty90 styr styr1-styr666 ///
	year numdailies tminus1sample mainsample st st1-st48

/* Replacing outcome and treatment by missings for 1868.
For weights computation, important to replace changeprestout and changedailies 
by missings for observations in the sample in levels but not in the first-diff 
sample. On the other hand, important to keep those variables as such for placebos 
=> stored in vars changeprestout_placebo and changedailies_placebo. */

gen changeprestout_placebo=changeprestout
gen changedailies_placebo=changedailies
replace changeprestout=. if tminus1sample==1
replace changedailies=. if tminus1sample==1
}
 
// f. Computation of the DID_M

/* Defining the "super groups" for each year by creating 2 dummies for increase 
   & decrease, and defining the final sample as observations for which change in 
   number of newspapers can be computed */
   

xtset cnty90 year
gen G_T=.
gen sample=.
forvalue i=1872(4)1928 {
gen group`i'=(changedailies>0)-(changedailies<0) if (year==`i')& ///
	changedailies !=. & mainsample==1 & changeprestout !=.
	
replace G_T=group`i' if (year==`i')&changedailies!=.&mainsample==1&changeprestout!=.
replace sample=1 if (year==`i')&changedailies!=.&mainsample==1&changeprestout!=.
gen group`i'_increase=(group`i'>0) if (year==`i')&group`i'!=.
gen group`i'_decrease=(group`i'<0) if (year==`i')&group`i'!=.
sort cnty90 year
replace group`i'=group`i'[_n+1] if cnty90==cnty90[_n+1]&year==`i'-4
replace sample=1 if cnty90==cnty90[_n+1]&year==`i'-4&sample[_n+1]==1
replace group`i'_increase=group`i'_increase[_n+1] if cnty90==cnty90[_n+1]&year==`i'-4
replace group`i'_decrease=group`i'_decrease[_n+1] if cnty90==cnty90[_n+1]&year==`i'-4
gen year`i'=(year==`i')
}
  
keep if sample==1
tab year
gen G_T_for=f4.G_T
sum sample G_T G_T_for
tab G_T

* Point estimate of the DID_M (again, results are displayed at the end of the program)

discard

set seed 1
fuzzydid prestout G_T G_T_for year numdailies, tc newcateg(0 1 2 1000) ///
		qualitative(st1-st48) nose
		
scalar did_m=el(e(b_LATE),1,1)
* scalar se_did_m=el(e(se_LATE),1,1)
scalar N_did_m=e(N)

* Testing difference between DID_M and fd estimator in Gentzkow et al. 

set seed 1
scalar diff_did_m_fd = did_m - betafd

matrix A=0,0

forvalue i=1/100{
preserve
bsample, cluster(cnty90)
fuzzydid prestout G_T G_T_for year numdailies, tc newcateg(0 1 2 1000) ///
		qualitative(st1-st48) nose
scalar DIDM_bs=el(e(b_LATE),1,1)
areg changeprestout changedailies if mainsample, absorb(styr)
scalar betafd_bs=_b[changedailies]
matrix A=A\DIDM_bs,betafd_bs
restore
}

preserve
drop _all
svmat A
drop if _n==1
sum A1
scalar se_did_m=r(sd)
gen A3=A1-A2
sum A3
scalar se_diff_did_m_fd = r(sd)
scalar t_st_diff_did_m_fd = diff_did_m_fd/r(sd)
corr A1 A2
scalar rho_did_m_fd = r(rho)
export delimited using "did_m_fd_bs.csv", replace
restore

import delimited using "did_m_fd_bs.csv", clear    case(preserve)
sum A1
scalar se_did_m=r(sd)
 
corr A1 A2
scalar rho_did_m_fd = r(rho)
display "FD estimator and standard error is " betafd se_fd 

display "DID_M estimator standard error is " did_m se_did_m 
display "The correlation between DID_M and FD is " rho_did_m_fd

* remove the added ado path
adopath - "$adodir" 
* verify the path
adopath
log close
